Date: Tue Mar 24 12:26:08 2020
Scientist: Ran Yin
Sequencing (Waksman): Dibyendu Kumar
Statistics: Davit Sargsyan
Principal Investigator: Ah-Ng Kong

# Taxonomic Ranks:
# **K**ing **P**hillip **C**an n**O**t **F**ind **G**reen **S**ocks
# * Kingdom                
# * Phylum                    
# * Class                   
# * Order                   
# * Family     
# * Genus     
# * Species  
options(stringsAsFactors = FALSE,
        scipen = 999)
# # Increase mmemory size to 64 Gb----
# invisible(utils::memory.limit(65536))
# str(knitr::opts_chunk$get())
# # NOTE: the below does not work!
# knitr::opts_chunk$set(echo = FALSE, 
#                       message = FALSE,
#                       warning = FALSE,
#                       error = FALSE)
# require(knitr)
# require(kableExtra)
# require(shiny)
require(phyloseq)
require(data.table)
require(ggplot2)
require(plotly)
require(DT)
source("source/functions_may2019.R")
# On Windows set multithread=FALSE----
mt <- TRUE

1 Introduction

C57BL/6 wild-type (WT) and Nrf-2 double-knock-out (KO -/-) mice were given 2-week microbiome stabilization process using AIN93M diet and 8 more weeks to treat with either AIN93M or AIN93M 5% PEITC diet. Fecal samples were collected weekly, immediately frozen in liquid nitrogen and stored at -80oC. Serum, cecal, colon epithelial and whole colon tissues at week 10 were also collected for further analyses. Baseline, week 1 and 4 fecal samples were selected for 16s rRNA sequencing.

This document examines results from the WT mice samples.

We will attampt to answer the following questions:
1. Did microbiome change over time?
2. Was microbiome affected by diet?
3. Was there a difference between the KO and WT?
4. If there was a change in microbiome composition, what functional changes did it carry? What are the essential functions of the bacteria affected by the treatment and how can this be shown in vivo (metabolites, inflammation markers, etc.)?

2 Data preprocessing

2.1 Raw Data

FastQ files were downloaded from Dr. Kumar’s DropBox. A total of 60 files (2 per sample, pair-ended) and 2 metadata files were downloaded.

2.2 Script

This script (nrf2ubiome_dada2_may2019_v1.Rmd) was developed using DADA2 Pipeline Tutorial (1.12) with tips and tricks from the University of Maryland Shool of Medicine Institute for Genome Sciences (IGS) Microbiome Analysis Workshop (April 8-11, 2019). The output of the DADA2 script (data_may2019/ps_may2019.RData) is explored in this document.

3 Meta data: sample description

# Load data----
# Counts
load("data_may2019/ps_may2019.RData")
# Taxonomy
load("data_may2019/taxa.RData")
taxa <- data.table(seq16s = rownames(taxa),
                   taxa)
# Samples
samples <- ps_may2019@sam_data
datatable(samples,
              options = list(pageLength = nrow(samples)))

4 Prune data

The OTUs were mapped to Bacteria (98.34%), Eukaryota (1.43%) and undefined (0.23%) kingdoms.

The total of 8,129 unique sequences were found. Out of those, 7,994 were mapped to bacterial genomes.

dim(ps_may2019@otu_table@.Data)
[1]   30 8129
# Remove OTU unmapped to Bacteria
ps0 <- subset_taxa(ps_may2019, 
                   Kingdom == "Bacteria")
dim(ps0@otu_table@.Data)
[1]   30 7994

These belonged to 13 Phylum. 230 of the OTUs (or 2.88% of bacterial OTUs) could not be mapped to a Phyla and were removed from this analysis (with 7,764 OTUs left).

t2 <- data.table(table(tax_table(ps0)[, "Phylum"],
                                  exclude = NULL))
t2$V1[is.na(t2$V1)] <- "Unknown"
setorder(t2, -N)
t2[, pct := N/sum(N)]
setorder(t2, -N)
colnames(t2) <- c("Phylum",
                  "Number of OTUs",
                  "Percent of OTUs")
datatable(t2,
          rownames = FALSE,
          caption = "Number of Bacterial OTUs by Phylum",
          class = "cell-border stripe",
          options = list(search = FALSE,
                         pageLength = nrow(t2))) %>%
  formatCurrency(columns = 2,
                 currency = "",
                 mark = ",",
                 digits = 0) %>%
  formatPercentage(columns = 3,
                   digits = 2)

ps1 <- subset_taxa(ps0, 
                   !is.na(Phylum))
dim(ps1@otu_table@.Data)
[1]   30 7764

5 OTU table (first 10 rows)

6 Total counts per sample (i.e. sequencing depth)

tmp <- copy(smpl)
tmp$WEEK <- factor(tmp$WEEK,
                    levels = c("week 0",
                               "week 1",
                               "week 4"),
                    labels = c("Week 0",
                               "Week 1",
                               "Week 4"))

tmp$TREATMENT <- factor(tmp$TREATMENT,
                        levels = c("Control",
                                   "PEITC"),
                        labels = c("AIN93M",
                                   "PEITC"))
p1 <- ggplot(tmp,
             aes(x = SAMPLE_NAME,
                 y = Total,
                 group = TREATMENT,
                 fill = TREATMENT)) +
  facet_wrap(~ WEEK,
             scale = "free_x") +
  geom_bar(stat = "identity",
           color = "black") +
  scale_x_discrete("") +
  scale_y_continuous("Number of Reads") +
  scale_fill_grey("Treatment", 
                  start = 0.2, 
                  end = 0.8,
                  na.value = "red",
                  aesthetics = "fill") +
  theme_bw() + 
  theme(panel.border = element_blank(), 
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        legend.position = "top")

tiff(filename = "tmp/seq_depth_may2019.tiff",
     height = 4,
     width = 6,
     units = "in",
     res = 600,
     compression = "lzw+p")
print(p1)
graphics.off()

print(p1)

7 Richness (Alpha diversity)

Shannon’s diversity index was calculated for each sample and ploted over time using the 7,764 from the 13 Phylum above.

ps0@sam_data$Diet_Week <- paste(samples$TREATMENT,
                                samples$WEEK,
                                sep = "_")
ps0@sam_data$ID <- substr(x = ps0@sam_data$SAMPLE_NAME,
                          start = 2,
                          stop = 3)
p1 <- plot_richness(ps0,
                    x = "Diet_Week", 
                    measures = "Shannon") +
    geom_line(aes(group = ID),
            color = "black") +
  geom_point(aes(fill = ID),
             shape = 21,
             size = 3,
             color = "black") +
  scale_x_discrete("") +
  theme(axis.text.x = element_text(angle = 30,
                                   hjust = 1,
                                   vjust = 1))

ggplotly(p = p1,
         tooltip = c("ID",
                     "value"))

p1 <- p1 + theme(legend.position = "none")

tiff(filename = "tmp/wt_shannon.tiff",
     height = 4,
     width = 5,
     units = "in",
     res = 600,
     compression = "lzw+p")
print(p1)
graphics.off()
tmp <- data.table(p1$data)
tmp[, delta := value[WEEK == "week 4"] - 
      value[WEEK == "week 0"],
    by = ID]

tmp1 <- droplevels(unique(tmp[WEEK %in% c("week 0",
                                          "week 4"), 
                              c("value",
                                "WEEK",
                                "TREATMENT")]))
m1 <- lm(value ~ WEEK*TREATMENT,
         data = tmp1)
print("Model1: Shannon's index by treatment and time")
anova(m1)

tmp2 <- droplevels(unique(tmp[WEEK == "week 0", 
                              c("delta",
                                "value",
                                "TREATMENT")]))
m2 <- lm(delta ~ TREATMENT,
         data = tmp2)
print("Model2: Shannon's index differences (Week 4 - Week 0) by treatment")
anova(m2)

m3 <- wilcox.test(delta ~ TREATMENT,
            data = tmp2)
print("Model3: Wilcoxon test for Shannon's index differences (Week 4 - Week 0) by treatment")
m3

The results showed that in 4 out of 5 animals in the PEITC group diversity increased over the 4 weeks compared to the baseline. At the same time, only 2 out of 5 Control animals increased its microbial diversity in the same period of time. NOTE: there was no statistically significant difference between the treatment groups as well as over time (see results above).

8 Counts at Phylum level

9 Relative abundance (%) at Phylum level

Remove phyla with relative abundance of >= 1% in less than 10% of samples.

t1 <- data.table(Phylum = ra_p$Phylum,
                 `Number of Samples` = rowSums(ra_p[, 2:ncol(ra_p)] >= 0.01))
t1$`Percent Samples` <-  t1$`Number of Samples`/30

setorder(t1, -`Number of Samples`)
datatable(t1,
          rownames = FALSE,
          caption = "Taxonomic  count table",
          class = "cell-border stripe",
          options = list(search = FALSE,
                         pageLength = nrow(t1))) %>%
  formatPercentage(columns = 3,
                   digits = 1)

Hence, only 6 out of 13 Phyla were studied in this analysis: Actinobacteria, Bacteroidetes, Firmicutes, Proteobacteria, Tenericutes and Verrucomicrobia. Relative abundance at the next taxonomic level (Class) was, therefore, computed relative to the sum of these 6 Phyla.

7,628 OTUs, down from 7,764 OTUs in the previous table.

10 Relative Abundance in Samples at Different Taxonomic Ranks

10.1 1. Class

p0 <- ggplot(mu,
             aes(x = Week,
                 y = x,
                 group = Treatment)) +
  facet_wrap(~ Class,
             scale = "free_y") +
  geom_line() +
  geom_point(aes(shape = Treatment,
                 color = Treatment),
             size = 3,
             alpha = 0.5) +
  scale_x_discrete("") +
  scale_y_continuous("Relative Abundance (%)") +
  theme(legend.position = "top",
        axis.text.x = element_text(angle = 45,
                                   hjust = 1))

tiff(filename = "tmp/wt_class_over_time.tiff",
     height = 5,
     width = 7,
     units = "in",
     res = 600,
     compression = "lzw+p")
print(p0)
graphics.off()

print(p0)
p1 <- ggplot(mu,
             aes(x = x,
                 y = Class,
                 color = Treatment,
                 shape = Week)) +
  geom_point(size = 3,
             alpha = 0.5) +
  geom_vline(xintercept = 1,
             linetype = "dashed") +
  scale_x_continuous("Relative Abundance (%)") +
  theme(legend.position = "top")

tiff(filename = "tmp/wt_class_ra.tiff",
     height = 4,
     width = 7,
     units = "in",
     res = 600,
     compression = "lzw+p")
print(p1)
graphics.off()

ggplotly(p1)

10.2 2. Order

p0 <- ggplot(mu,
             aes(x = Week,
                 y = x,
                 group = Treatment)) +
  facet_wrap(~ Order,
             scale = "free_y") +
  geom_line() +
  geom_point(aes(shape = Treatment,
                 color = Treatment),
             size = 5,
             alpha = 0.5)
print(p0)
p1 <- ggplot(mu,
             aes(x = x,
                 y = Order,
                 fill = Treatment,
                 shape = Week)) +
  # facet_wrap(~ Sex, nrow = 1) +
  geom_point(size = 3,
             alpha = 0.5) +
  geom_vline(xintercept = 1,
             linetype = "dashed") +
  scale_x_continuous("Relative Abundance (%)")
ggplotly(p1)

11 Session Information

sessionInfo()
---
title: "Nrf2 BL6 Wild-Type (WT) PEITC 16S Microbiome Data Visualization"
output: 
  html_notebook:
    toc: yes
    toc_float: yes
    number_sections: yes
    code_folding: hide
---
Date: `r date()`     
Scientist: [Ran Yin](mailto:ry147@scarletmail.rutgers.edu)      
Sequencing (Waksman): [Dibyendu Kumar](mailto:dk@waksman.rutgers.edu)      
Statistics: [Davit Sargsyan](mailto:sargdavid@gmail.com)      
Principal Investigator: [Ah-Ng Kong](mailto:kongt@pharmacy.rutgers.edu) 

```{}
# Taxonomic Ranks:
# **K**ing **P**hillip **C**an n**O**t **F**ind **G**reen **S**ocks
# * Kingdom                
# * Phylum                    
# * Class                   
# * Order                   
# * Family     
# * Genus     
# * Species  
```

```{r setup}
options(stringsAsFactors = FALSE,
        scipen = 999)

# # Increase mmemory size to 64 Gb----
# invisible(utils::memory.limit(65536))


# str(knitr::opts_chunk$get())
# # NOTE: the below does not work!
# knitr::opts_chunk$set(echo = FALSE, 
#                       message = FALSE,
#                       warning = FALSE,
#                       error = FALSE)

# require(knitr)
# require(kableExtra)
# require(shiny)

require(phyloseq)
require(data.table)
require(ggplot2)
require(plotly)
require(DT)

source("source/functions_may2019.R")

# On Windows set multithread=FALSE----
mt <- TRUE
```

# Introduction
C57BL/6 wild-type (WT) and Nrf-2 double-knock-out (KO -/-) mice were given 2-week microbiome stabilization process using AIN93M diet and 8 more weeks to treat with either AIN93M or AIN93M 5% PEITC diet. Fecal samples were collected weekly, immediately frozen in liquid nitrogen and stored at -80^o^C. Serum, cecal, colon epithelial and whole colon tissues at week 10 were also collected for further analyses. Baseline, week 1 and 4 fecal samples were selected for 16s rRNA sequencing.  
  
This document examines results from the WT mice samples.  
  
We will attampt to answer the following questions:  
1. Did microbiome change over time?  
2. Was microbiome affected by diet?  
3. Was there a difference between the KO and WT?  
4. If there was a change in microbiome composition, what functional changes did it carry? What are the essential functions of the bacteria affected by the treatment and how can this be shown in vivo (metabolites, inflammation markers, etc.)?

# Data preprocessing
## Raw Data 
FastQ files were downloaded from [Dr. Kumar's DropBox](https://www.dropbox.com/sh/sm9tinm0f5r6y1v/AADjGPRRNiIM7zMSfANDkQjFa?dl=0). A total of 60 files (2 per sample, pair-ended) and 2 metadata files were downloaded.

## Script
This script (***nrf2ubiome_dada2_may2019_v1.Rmd***) was developed using [DADA2 Pipeline Tutorial (1.12)](https://benjjneb.github.io/dada2/tutorial.html) with tips and tricks from the [University of Maryland Shool of Medicine Institute for Genome Sciences (IGS)](http://www.igs.umaryland.edu/) [Microbiome Analysis Workshop (April 8-11, 2019)](http://www.igs.umaryland.edu/education/wkshp_metagenome.php). The output of the DADA2 script (***data_may2019/ps_may2019.RData***) is explored in this document.

# Meta data: sample description
```{r data}
# Load data----
# Counts
load("data_may2019/ps_may2019.RData")

# Taxonomy
load("data_may2019/taxa.RData")
taxa <- data.table(seq16s = rownames(taxa),
                   taxa)

# Samples
samples <- ps_may2019@sam_data
datatable(samples,
              options = list(pageLength = nrow(samples)))
```

# Prune data
The OTUs were mapped to Bacteria (98.34%), Eukaryota (1.43%) and undefined (0.23%) kingdoms. 

```{r check_mapping_kingdom, warning = FALSE, echo = FALSE, message = FALSE}
t1 <- data.table(table(tax_table(ps_may2019)[,
                                             "Kingdom"],
                       exclude = NULL))
t1$V1[is.na(t1$V1)] <- "Unknown"

t1[, pct := N/sum(N)]
setorder(t1, -N)

colnames(t1) <- c("Kingdom",
                  "Number of OTUs",
                  "Percent of OTUs")
datatable(t1,
          rownames = FALSE,
          caption = "Number of OTUs by Kingdom",
          class = "cell-border stripe",
          options = list(search = FALSE,
                         pageLength = nrow(t1))) %>%
  formatCurrency(columns = 2,
                 currency = "",
                 mark = ",",
                 digits = 0) %>%
  formatPercentage(columns = 3,
                   digits = 2)
```

The total of 8,129 unique sequences were found. Out of those, 7,994 were mapped to bacterial genomes. 

```{r keep_bacteria}
dim(ps_may2019@otu_table@.Data)

# Remove OTU unmapped to Bacteria
ps0 <- subset_taxa(ps_may2019, 
                   Kingdom == "Bacteria")
dim(ps0@otu_table@.Data)
```
  
These belonged to 13 Phylum. 230 of the OTUs (or 2.88% of bacterial OTUs) could not be mapped to a Phyla and were removed from this analysis (with 7,764 OTUs left).

```{r phylum_mapping}
t2 <- data.table(table(tax_table(ps0)[, "Phylum"],
                                  exclude = NULL))
t2$V1[is.na(t2$V1)] <- "Unknown"
setorder(t2, -N)
t2[, pct := N/sum(N)]
setorder(t2, -N)

colnames(t2) <- c("Phylum",
                  "Number of OTUs",
                  "Percent of OTUs")

datatable(t2,
          rownames = FALSE,
          caption = "Number of Bacterial OTUs by Phylum",
          class = "cell-border stripe",
          options = list(search = FALSE,
                         pageLength = nrow(t2))) %>%
  formatCurrency(columns = 2,
                 currency = "",
                 mark = ",",
                 digits = 0) %>%
  formatPercentage(columns = 3,
                   digits = 2)

ps1 <- subset_taxa(ps0, 
                   !is.na(Phylum))
dim(ps1@otu_table@.Data)
```

# OTU table (first 10 rows)
```{r otu_table, warning=FALSE,echo=FALSE,message=FALSE}
otu <- data.table(ps0@tax_table@.Data,
                  t(ps0@otu_table@.Data))
datatable(head(otu, 10),
          rownames = FALSE,
          caption = "Taxonomic  count table",
          class = "cell-border stripe",
          options = list(search = FALSE,
                         pageLength = 10)) %>%
  formatCurrency(columns = 7:36,
                 currency = "",
                 mark = ",",
                 digits = 0)
```

# Total counts per sample (i.e. sequencing depth)
```{r Tax, warning=FALSE,echo=FALSE,message=FALSE,fig.width=10,fig.height=5}
t1 <- colSums(otu[, 7:ncol(otu)])
t1 <- data.table(SAMPLE_NAME = names(t1),
                 Total = t1)

tmp <- data.table(SAMPLE_NAME = samples$SAMPLE_NAME,
                  TREATMENT = samples$TREATMENT,
                  WEEK = samples$WEEK)

smpl <- merge(tmp,
            t1,
            by = "SAMPLE_NAME")

p1 <- ggplot(smpl,
             aes(x = SAMPLE_NAME,
                 y = Total,
                 fill = TREATMENT,
                 colour = WEEK)) +
  geom_bar(stat = "identity") +
  scale_x_discrete("Sample Name") +
  scale_y_continuous("Number of Reads") +
  scale_fill_discrete("Group") +
  theme(axis.text.x = element_text(angle = 45,
                                   hjust = 1)) 
ggplotly(p1)
```

```{r seq_depth_greyscale, , fig.width = 6, fig.height = 4}
tmp <- copy(smpl)
tmp$WEEK <- factor(tmp$WEEK,
                    levels = c("week 0",
                               "week 1",
                               "week 4"),
                    labels = c("Week 0",
                               "Week 1",
                               "Week 4"))

tmp$TREATMENT <- factor(tmp$TREATMENT,
                        levels = c("Control",
                                   "PEITC"),
                        labels = c("AIN93M",
                                   "PEITC"))
p1 <- ggplot(tmp,
             aes(x = SAMPLE_NAME,
                 y = Total,
                 group = TREATMENT,
                 fill = TREATMENT)) +
  facet_wrap(~ WEEK,
             scale = "free_x") +
  geom_bar(stat = "identity",
           color = "black") +
  scale_x_discrete("") +
  scale_y_continuous("Number of Reads") +
  scale_fill_grey("Treatment", 
                  start = 0.2, 
                  end = 0.8,
                  na.value = "red",
                  aesthetics = "fill") +
  theme_bw() + 
  theme(panel.border = element_blank(), 
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        legend.position = "top")

tiff(filename = "tmp/seq_depth_may2019.tiff",
     height = 4,
     width = 6,
     units = "in",
     res = 600,
     compression = "lzw+p")
print(p1)
graphics.off()

print(p1)
```

# Richness (Alpha diversity)
Shannon's diversity index was calculated for each sample and ploted over time using the 7,764 from the 13 Phylum above.

```{r richness, fig.width = 10,fig.height = 5}
ps0@sam_data$Diet_Week <- paste(samples$TREATMENT,
                                samples$WEEK,
                                sep = "_")
ps0@sam_data$ID <- substr(x = ps0@sam_data$SAMPLE_NAME,
                          start = 2,
                          stop = 3)
p1 <- plot_richness(ps0,
                    x = "Diet_Week", 
                    measures = "Shannon") +
    geom_line(aes(group = ID),
            color = "black") +
  geom_point(aes(fill = ID),
             shape = 21,
             size = 3,
             color = "black") +
  scale_x_discrete("") +
  theme(axis.text.x = element_text(angle = 30,
                                   hjust = 1,
                                   vjust = 1))

ggplotly(p = p1,
         tooltip = c("ID",
                     "value"))

p1 <- p1 + theme(legend.position = "none")

tiff(filename = "tmp/wt_shannon.tiff",
     height = 4,
     width = 5,
     units = "in",
     res = 600,
     compression = "lzw+p")
print(p1)
graphics.off()
```

```{r test_richness}
tmp <- data.table(p1$data)
tmp[, delta := value[WEEK == "week 4"] - 
      value[WEEK == "week 0"],
    by = ID]

tmp1 <- droplevels(unique(tmp[WEEK %in% c("week 0",
                                          "week 4"), 
                              c("value",
                                "WEEK",
                                "TREATMENT")]))
m1 <- lm(value ~ WEEK*TREATMENT,
         data = tmp1)
print("Model1: Shannon's index by treatment and time")
anova(m1)

tmp2 <- droplevels(unique(tmp[WEEK == "week 0", 
                              c("delta",
                                "value",
                                "TREATMENT")]))
m2 <- lm(delta ~ TREATMENT,
         data = tmp2)
print("Model2: Shannon's index differences (Week 4 - Week 0) by treatment")
anova(m2)

m3 <- wilcox.test(delta ~ TREATMENT,
            data = tmp2)
print("Model3: Wilcoxon test for Shannon's index differences (Week 4 - Week 0) by treatment")
m3
```
  
The results showed that in 4 out of 5 animals in the PEITC group diversity increased over the 4 weeks compared to the baseline. At the same time, only 2 out of 5 Control animals increased its microbial diversity in the same period of time. NOTE: there was no statistically significant difference between the treatment groups as well as over time (see results above).  
  
# Counts at Phylum level
```{r counts_p, warning=FALSE,echo=FALSE,message=FALSE}
counts_p <- counts_by_tax_rank(dt1 = otu,
                               aggr_by = "Phylum")
setorder(counts_p, -`0A1`)
datatable(counts_p,
          rownames = FALSE,
          caption = "Taxonomic  count table",
          class = "cell-border stripe",
          options = list(search = FALSE,
                         pageLength = nrow(counts_p))) %>%
  formatCurrency(columns = 2:ncol(counts_p),
                 currency = "",
                 mark = ",",
                 digits = 0)
```

# Relative abundance (%) at Phylum level
```{r ra_p, warning=FALSE,echo=FALSE,message=FALSE}
ra_p <- ra_by_tax_rank(counts = counts_p,
                       pct = FALSE,
                       digit = 4)

datatable(ra_p,
          rownames = FALSE,
          caption = "Taxonomic  count table",
          class = "cell-border stripe",
          options = list(search = FALSE,
                         pageLength = nrow(ra_p))) %>%
  formatPercentage(columns = 2:ncol(counts_p),
                   digits = 2)
```

Remove phyla with relative abundance of >= 1% in less than 10% of samples.

```{r prev_p}
t1 <- data.table(Phylum = ra_p$Phylum,
                 `Number of Samples` = rowSums(ra_p[, 2:ncol(ra_p)] >= 0.01))
t1$`Percent Samples` <-  t1$`Number of Samples`/30

setorder(t1, -`Number of Samples`)
datatable(t1,
          rownames = FALSE,
          caption = "Taxonomic  count table",
          class = "cell-border stripe",
          options = list(search = FALSE,
                         pageLength = nrow(t1))) %>%
  formatPercentage(columns = 3,
                   digits = 1)
```

Hence, only 6 out of 13 Phyla were studied in this analysis: Actinobacteria, Bacteroidetes, Firmicutes, Proteobacteria, Tenericutes and Verrucomicrobia. Relative abundance at the next taxonomic level (Class) was, therefore, computed relative to the sum of these 6 Phyla.

```{r keep_6_phyla, warning=FALSE,echo=FALSE,message=FALSE}
keep_p <- t1$Phylum[t1$`Percent Samples` >= 0.1]
paste0(keep_p, collapse = ", ")

ps1 <- subset_taxa(ps0, 
                   Phylum %in% keep_p )
otu1 <- data.table(ps1@tax_table@.Data,
                   t(ps1@otu_table@.Data))

datatable(head(otu1, 10),
          rownames = FALSE,
          caption = "Taxonomic  count table",
          class = "cell-border stripe",
          options = list(search = FALSE,
                         pageLength = 10)) %>%
  formatCurrency(columns = 7:ncol(otu1),
                 currency = "",
                 mark = ",",
                 digits = 0)
```

7,628 OTUs, down from 7,764 OTUs in the previous table.


# Relative Abundance in Samples at Different Taxonomic Ranks
## 1. Class
```{r counts_c, warning=FALSE,echo=FALSE,message=FALSE,fig.width=10,fig.height=6}
counts_c <- counts_by_tax_rank(dt1 = otu1,
                               aggr_by = "Class")
ra_c <- ra_by_tax_rank(counts_c)

tax.ranks <- unique(otu1[, c("Phylum",
                             "Class")])

ra_c <- merge(tax.ranks,
              ra_c,
              by = "Class")

total <- rowSums(ra_c[, 3:ncol(ra_c)])

ra_c$Class <- factor(ra_c$Class,
                     levels = ra_c$Class[order(total)])

ra_c$Phylum <- factor(ra_c$Phylum,
                      levels = unique(ra_c$Phylum[order(total)]))
tmp <- melt.data.table(data = ra_c,
                       id.vars = 1:2,
                       measure.vars = 3:ncol(counts_c),
                       variable.name = "SAMPLE_NAME",
                       value.name = "RA")

tmp <- merge(data.table(SAMPLE_NAME = samples$SAMPLE_NAME,
                        WEEK = samples$WEEK,
                        TREATMENT = samples$TREATMENT),
             tmp,
             by = "SAMPLE_NAME")

# Plot samples
p1 <- ggplot(tmp,
             aes(x = SAMPLE_NAME,
                 y = RA,
                 fill = Class,
                 color = Phylum)) +
  facet_wrap(~ WEEK + TREATMENT,
             scales = "free_x",
             nrow = 3) +
  geom_bar(stat = "identity") +
  scale_x_discrete("") +
  scale_y_continuous(expand = c(0, 0)) +
  theme(axis.text.x = element_text(angle = 0,
                                   hjust = 1))
ggplotly(p1)
```

```{r means_c, echo = FALSE, warning = FALSE, message = FALSE}
lra <- ra_melt(ra = ra_c,
               samples = samples,
               sample_name = "SAMPLE_NAME")

mu <- data.table(aggregate(lra$RA,
                           by = list(Week = lra$WEEK,
                                     Treatment = lra$TREATMENT,
                                     Class = lra$Class),
                           FUN = "mean"))
mu[, total := sum(x),
   by = "Class"]
ul <- unique(mu[, c("Class", 
                    "total")])
ul <- ul[order(total),]
mu$Class <- factor(mu$Class,
                   level = ul$Class)
mu$total <- NULL

datatable(mu,
          rownames = FALSE,
          caption = "Taxonomic  count table",
          class = "cell-border stripe",
          options = list(search = FALSE,
                         pageLength = nrow(mu),
                         order = list(list(3, 'desc'))))
```


```{r means_c_p0, fig.width = 7, fig.height = 5}
p0 <- ggplot(mu,
             aes(x = Week,
                 y = x,
                 group = Treatment)) +
  facet_wrap(~ Class,
             scale = "free_y") +
  geom_line() +
  geom_point(aes(shape = Treatment,
                 color = Treatment),
             size = 3,
             alpha = 0.5) +
  scale_x_discrete("") +
  scale_y_continuous("Relative Abundance (%)") +
  theme(legend.position = "top",
        axis.text.x = element_text(angle = 45,
                                   hjust = 1))

tiff(filename = "tmp/wt_class_over_time.tiff",
     height = 5,
     width = 7,
     units = "in",
     res = 600,
     compression = "lzw+p")
print(p0)
graphics.off()

print(p0)
```


```{r means_c_p1, fig.height = 5, fig.width = 6}
p1 <- ggplot(mu,
             aes(x = x,
                 y = Class,
                 color = Treatment,
                 shape = Week)) +
  geom_point(size = 3,
             alpha = 0.5) +
  geom_vline(xintercept = 1,
             linetype = "dashed") +
  scale_x_continuous("Relative Abundance (%)") +
  theme(legend.position = "top")

tiff(filename = "tmp/wt_class_ra.tiff",
     height = 4,
     width = 7,
     units = "in",
     res = 600,
     compression = "lzw+p")
print(p1)
graphics.off()

ggplotly(p1)
```

## 2. Order
```{r counts_o, warning=FALSE,echo=FALSE,message=FALSE,fig.width=10,fig.height=6}
counts_o <- counts_by_tax_rank(dt1 = otu1,
                               aggr_by = "Order")
ra_o <- ra_by_tax_rank(counts_o)

tax.ranks <- unique(otu1[, c("Phylum",
                             "Order")])

ra_o <- merge(tax.ranks,
              ra_o,
              by = "Order")

total <- rowSums(ra_o[, 3:ncol(ra_o)])

ra_o$Order <- factor(ra_o$Order,
                     levels = ra_o$Order[order(total)])

ra_o$Phylum <- factor(ra_o$Phylum,
                      levels = unique(ra_o$Phylum[order(total)]))
tmp <- melt.data.table(data = ra_o,
                       id.vars = 1:2,
                       measure.vars = 3:ncol(counts_o),
                       variable.name = "SAMPLE_NAME",
                       value.name = "RA")

tmp <- merge(data.table(SAMPLE_NAME = samples$SAMPLE_NAME,
                        WEEK = samples$WEEK,
                        TREATMENT = samples$TREATMENT),
             tmp,
             by = "SAMPLE_NAME")

# Plot samples
p1 <- ggplot(tmp,
             aes(x = SAMPLE_NAME,
                 y = RA,
                 fill = Order,
                 color = Phylum)) +
  facet_wrap(~ WEEK + TREATMENT,
             scales = "free_x",
             nrow = 3) +
  geom_bar(stat = "identity") +
  scale_x_discrete("") +
  scale_y_continuous(expand = c(0, 0)) +
  theme(axis.text.x = element_text(angle = 0,
                                   hjust = 1))
ggplotly(p1)
```

```{r means_o, echo = FALSE, warning = FALSE, message = FALSE}
lra <- ra_melt(ra = ra_o,
               samples = samples,
               sample_name = "SAMPLE_NAME")

mu <- data.table(aggregate(lra$RA,
                           by = list(Week = lra$WEEK,
                                     Treatment = lra$TREATMENT,
                                     Order = lra$Order),
                           FUN = "mean"))
mu[, total := sum(x),
   by = "Order"]
ul <- unique(mu[, c("Order", 
                    "total")])
ul <- ul[order(total),]
mu$Order <- factor(mu$Order,
                   level = ul$Order)
mu$total <- NULL

datatable(mu,
          rownames = FALSE,
          caption = "Taxonomic  count table",
          class = "cell-border stripe",
          options = list(search = FALSE,
                         pageLength = nrow(mu),
                         order = list(list(3, 'desc'))))
```

```{r means_o_p0, fig.width = 10, fig.height = 5}
p0 <- ggplot(mu,
             aes(x = Week,
                 y = x,
                 group = Treatment)) +
  facet_wrap(~ Order,
             scale = "free_y") +
  geom_line() +
  geom_point(aes(shape = Treatment,
                 color = Treatment),
             size = 5,
             alpha = 0.5)
print(p0)
```

```{r means_o_p1, fig.width = 10, fig.height = 10}
p1 <- ggplot(mu,
             aes(x = x,
                 y = Order,
                 fill = Treatment,
                 shape = Week)) +
  # facet_wrap(~ Sex, nrow = 1) +
  geom_point(size = 3,
             alpha = 0.5) +
  geom_vline(xintercept = 1,
             linetype = "dashed") +
  scale_x_continuous("Relative Abundance (%)")
ggplotly(p1)
```

# Session Information
```{r info,eval=TRUE}
sessionInfo()
```